rm(list = ls())

setwd('/path/to/replication/')

library(data.table)
library(zoo)
library(ggplot2)
library(strucchange)

# Figure J1: Italian Public Discourse Trends About Chinese People: Using Twitter Data

tweets <- fread(file = './data/tweets_classified_pretrained_model.csv')
tweets[,month:=as.yearmon(paste(substr(date,1,2), substr(date,7,10), sep='-'), format='%m-%Y')]

# Figure J1 (b)
ggplot(tweets[,.(share=mean(sentiment_negative)), by=.(month, group)], aes(month, share, color = group)) +
  geom_line(size=1) +
  scale_colour_grey(start = 0.2, end = 0.6) +
  ylab("Share of negative tweets") +
  xlab(NULL) +
  geom_vline(xintercept = as.yearmon('01-2020', '%m-%Y'), color='blue', linetype='dashed') +
  geom_text(x=as.yearmon('10-2019', '%m-%Y'), y=.93, label="Covid-19", color='blue') +
  geom_text(x=as.yearmon('04-2018', '%m-%Y'), y=.85, label=paste("African", "people", sep='\n'), color='#000000', size=4) +
  geom_text(x=as.yearmon('06-2018', '%m-%Y'), y=.65, label=paste("Chinese", "people", sep='\n'), color='#999999', size=4) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        legend.position ='none',
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"
        ),axis.title.y = element_text(size = rel(1.2)), axis.title.x = element_text(size = rel(1.1)))
ggsave('./tweet_share_trends_pretrained_model.pdf', width=6, height=4.5)

# Figure J1 (a)
tweets_plot <- melt(tweets[,.(`Total tweets`=.N, `Negative tweets`=sum(sentiment_negative)), by=.(month, group)],
                    id.vars = c('month', 'group'), 
                    measure.vars = c('Total tweets', 'Negative tweets'))

ggplot(tweets_plot, 
       aes(month, value, color = group, linetype=variable)
) +
  geom_line(size = 1) +
  scale_colour_grey(start = 0.2, end = 0.6, labels = c(paste("African", "people", sep='\n'), paste("Chinese", "people", sep='\n'))) +
  geom_vline(xintercept = as.yearmon('01-2020', '%m-%Y'), color='blue', linetype='dashed') +
  geom_text(x=as.yearmon('10-2019', '%m-%Y'), y=8500, label="Covid-19", color='blue') +
  ylab("Total and negative tweets") +
  xlab(NULL) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        legend.position ='top',
        legend.title = element_blank(),
        legend.text = element_text(size = 12),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"
        ),axis.title.y = element_text(size = rel(1.2)), axis.title.x = element_text(size = rel(1.1)))
ggsave('./tweet_total_negative_trends_pretrained_model.pdf', width=6, height=4.5)


# Figure J2: Generalized fluctuation test for structural breaks in the number of negative tweets about Chinese- and African-origin people

#### break in trends analysis
tweets <- fread('./data/tweets_classified_pretrained_model.csv')
tweets[,month:=as.yearmon(date, format='%m/%d/%Y')]

# Figure J2 (a)
## break in trends
asian <- tweets[group=='china' & sentiment_negative==1]
asian <- asian[,.N,by=month]
asian <- asian[order(month)]
asian_ts <- ts(asian[,N], start=c(2018, 1), end=c(2020, 4), frequency=12) 

#### testing for structural change (residuals)
re.seat <- efp(asian_ts ~ 1, type='OLS-MOSUM')

## dating the structural change
bp.seat <- breakpoints(asian_ts ~ 1, type='OLS-MOSUM')

## plot structural change 
pdf('./chinese_tweets_trend_break.pdf', width=6, height=4.5)
plot(re.seat, xlab=NULL, main = NA, axes=F, ylab="Chinese negative tweets' fluctuation process")
box(bty="l")
axis(2, c(-1.2, 0, 1.2))
axis(1, c(2018, 2019, 2020))
lines(bp.seat, breaks = 1,col='blue')
dev.off()

# Chow test on Jan, 2020
sctest(asian_ts ~ 1,
       type = "Chow", point = 25)

# Figure J2 (b)
##
africa <- tweets[group=='africa' & sentiment_negative==1]
africa <- africa[,.N,by=month]
africa <- africa[order(month)]
africa_ts <- ts(africa[,N], start=c(2018, 1), end=c(2020, 12), frequency=12) 

#### testing for structural change (residuals)
re.seat <- efp(africa_ts ~ 1, type='OLS-MOSUM')

## dating the structural change
bp.seat <- breakpoints(africa_ts ~ 1, type='OLS-MOSUM')

## plot structural change 
pdf('./african_tweets_trend_break.pdf', width=6, height=4.5)
plot(re.seat, xlab=NULL, main = NA, axes=F, ylab="African negative tweets' fluctuation process")
box(bty="l")
axis(2, c(-1.2, 0, 1.2))
axis(1, c(2018, 2019, 2020, 2021))
lines(bp.seat, breaks = 1,col='blue')
dev.off()

# Chow test on Jan, 2020
sctest(africa_ts ~ 1,
       type = "Chow", point = 25)
